from bs4 import BeautifulSoup
"""
Python 的 open 方法默认编码取决于平台，如果是 Windows 平台，默认编码是 gbk，如果文件是 utf-8 编码，就会报这个错误
UnicodeDecodeError: 'gbk' codec can't decode byte 0xa2 in position 142: illegal multibyte sequence
"""
with open("./test.html", 'r', encoding='utf-8') as fin:
    html_doc = fin.read()

soup = BeautifulSoup(html_doc, "html.parser")

div_node = soup.find("div", id="content")

links = div_node.find_all("a")

for link in links:
    """
    link.name       标签名称   a
    link["href"]    标签属性   https://www.baidu.com/
    link.get_text   文本内容   
    """
    print(link.name, link["href"], link.get_text())

img = div_node.find("img")
print(img["src"])